installing required packages and library
# install required packages/library
install.packages("matlib")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("rsample")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("corrplot")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("MASS")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
## Warning: package 'MASS' is not available for this version of R
## 'MASS' version 7.3-65 is in the repositories but depends on R (>= 4.4.0)
## 'MASS' version 7.3-65 is in the repositories but depends on R (>= 4.6)
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
install.packages("gridExtra")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("knitr")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
import required packages/library
library(matlib) # for matrix operations
library(ggplot2) # for data visualization
library(rsample) # for data splitting
library(corrplot) # for correlation visualization
## corrplot 0.95 loaded
library(MASS) # for statistical functions
library(gridExtra) # for arranging plots
library(dplyr) # for data manipulation
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr) # for tables
load features dataset
features<-as.matrix(read.csv(file="data/features.csv",header=F))
colnames(features)<-c("x1", "x3", "x4", "x5")
head(features)
## x1 x3 x4 x5
## [1,] 8.34 40.77 1010.84 90.01
## [2,] 23.64 58.49 1011.40 74.20
## [3,] 29.74 56.90 1007.15 41.91
## [4,] 19.07 49.69 1007.22 76.79
## [5,] 11.80 40.66 1017.13 97.20
## [6,] 13.97 39.16 1016.05 84.60
load target dataset
target<-as.matrix(read.csv(file="data/target.csv",header=F))
colnames(target)<-c("X2")
head(target)
## X2
## [1,] 480.48
## [2,] 445.75
## [3,] 438.76
## [4,] 453.09
## [5,] 464.43
## [6,] 470.96
load time series dataset
time<-as.matrix((read.csv(file="data/timeseries.csv",header=F)))
colnames(time)<-c("T1")
head(time)
## T1
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4
## [5,] 5
## [6,] 6
Task 1: Preliminary Data Analysis
time series of output signal
target.ts<-ts(target,start=c(min(time),max(time)),frequency=1)
plot(target.ts,main="Time Series - Output Signal",xlab="Time",ylab="Output Signal")

Task 1.2: Distribution of Each Signal
# histogram for temperature (x1)
density_x1<-density(features[,"x1"])
hist(features[,"x1"],freq=FALSE, main="Distribution of Temperature (x1)", xlab="Ambient temperature (°C)")
lines(density_x1, lwd=2)

# histogram for ambient pressure (x3)
density_x3<-density(features[,"x3"])
hist(features[,"x3"],freq=FALSE, main="Distribution of Ambient Pressure (x3)", xlab="Atmospheric pressure (millibar)")
lines(density_x3, lwd=2)

# histogram for relative humidity (x4)
density_x4<-density(features[,"x4"])
hist(features[,"x4"],freq=FALSE, main="Distribution of Relative Humidity (x4)", xlab="Humidity level (%)")
lines(density_x4, lwd=2)

# histogram for exhaust vacuum (x5)
density_x5<-density(features[,"x5"])
hist(features[,"x5"],freq=FALSE, main="Distribution of Exhaust Vacuum (x5)", xlab="Vacuum collected from the steam turbine (cm Hg)")
lines(density_x5, lwd=2)

# histogram for net hourly electrical energy output (X2)
density_x2<-density(target[,"X2"])
hist(target[,"X2"],freq=FALSE, main="Distribution of Net hourly electrical energy output (X2)", xlab="Net hourly electrical energy output in MW")
lines(density_x2, lwd=2)

Task 1.3: Correlation and Scatter Plots
# correlation
combined <- cbind(features, target)
correlation<-cor(combined)
print(correlation)
## x1 x3 x4 x5 X2
## x1 1.0000000 0.8441067 -0.50754934 -0.54253465 -0.9481285
## x3 0.8441067 1.0000000 -0.41350216 -0.31218728 -0.8697803
## x4 -0.5075493 -0.4135022 1.00000000 0.09957432 0.5184290
## x5 -0.5425347 -0.3121873 0.09957432 1.00000000 0.3897941
## X2 -0.9481285 -0.8697803 0.51842903 0.38979410 1.0000000
corrplot(correlation)

scatter plot of x1 and X2
plot(features[,"x1"],
target[,"X2"],
main="Temperature vs Net Hourly Electrical Energy Output",
xlab="Temperature (°C)",
ylab="Net Hourly Electrical Energy Output (MW)")

scatter plot of x3 and X2
plot(features[,"x3"],
target[,"X2"],
main="Ambient Pressure vs Net Hourly Electrical Energy Output",
xlab="Ambient Pressure (millibar)",
ylab="Net Hourly Electrical Energy Output (MW)")

scatter plot of x4 and X2
plot(features[,"x4"],
target[,"X2"],
main="Relative Humidity vs Net Hourly Electrical Energy Output",
xlab="Relative Humidity (%)",
ylab="Net Hourly Electrical Energy Output (MW)")

scatter plot of x5 and X2
plot(features[,"x5"],
target[,"X2"],
main="Exhaust Vacuum vs Net Hourly Electrical Energy Output",
xlab="Exhaust Vacuum (cm Hg)",
ylab="Net Hourly Electrical Energy Output (MW)")

Task 2: Regression - Modelling the Between the Gene Expression
# candidate nonlinear polynomial regression models
model1<-function(X, theta){
theta[1]*X$x4 + theta[2]*X$x3^2 + theta[3]
}
model2<-function(X, theta){
theta[1]*X$x4 + theta[2]*X$x3^2 + theta[3]*X$x5 + theta[4]
}
model3<-function(X, theta){
theta[1]*X$x3 + theta[2]*X$x4 + theta[3]*X$x5^3
}
model4<-function(X, theta){
theta[1]*X$x4 + theta[2]*X$x3^2 + theta[3]*X$x5^3 + theta[4]
}
model5<-function(X, theta){
theta[1]*X$x4 + theta[2]*X$x1^2 + theta[3]*X$x3^2 + theta[4]
}
Task 2.1: estimation of model parameters (theta) for every candidate
model using least square
# convert matrix into dataframe
X<-as.data.frame(features)
theta_ridge<-function(X, y, lambda=1e-1){
X_bias<-cbind(1, as.matrix(X))
XtX <- t(X_bias) %*% X_bias # compute X^T * X
XtX_reg <- XtX + lambda * diag(c(0, rep(1, ncol(X)))) # regularization term
theta <- ginv(XtX_reg) %*% t(X_bias) %*% y # generalized inverse to solve
return(theta)
}
# theta for model 1
X_model1 <- data.frame(x4 = X$x4, x3 = X$x3^2)
theta_model1 <- theta_ridge(X_model1, target)
print("Theta Model 1:")
## [1] "Theta Model 1:"
print(theta_model1)
## X2
## [1,] 0.0004686643
## [2,] 0.4775167121
## [3,] -0.0094776540
# theta for model 2
X_model2 <- data.frame(x4 = X$x4, x3 = X$x3^2, x5 = X$x5)
theta_model2 <- theta_ridge(X_model2, target)
print("Theta Model 2:")
## [1] "Theta Model 2:"
print(theta_model2)
## X2
## [1,] 0.0004582797
## [2,] 0.4636853830
## [3,] -0.0089687414
## [4,] 0.1695904901
X_model3 <- data.frame(x3 = X$x3, x4 = X$x4, x5 = X$x5^3)
theta_model3 <- theta_ridge(X_model3, target)
print("Theta for Model 3:")
## [1] "Theta for Model 3:"
print(theta_model3)
## X2
## [1,] 4.295980e-04
## [2,] 2.652240e-02
## [3,] 4.349701e-01
## [4,] 2.778559e-05
X_model4 <- data.frame(x4 = X$x4, x3 = X$x3^2, x5 = X$x5^3)
theta_model4 <- theta_ridge(X_model4, target)
print("Theta for Model 4:")
## [1] "Theta for Model 4:"
print(theta_model4)
## X2
## [1,] 4.621424e-04
## [2,] 4.713285e-01
## [3,] -8.968820e-03
## [4,] 1.066722e-05
X_model5 <- data.frame(x4 = X$x4, x1 = X$x1^2, x3 = X$x3^2)
theta_model5 <- theta_ridge(X_model5, target)
print("Theta for Model 5:")
## [1] "Theta for Model 5:"
print(theta_model5)
## X2
## [1,] 0.000465739
## [2,] 0.474420584
## [3,] -0.033921625
## [4,] -0.003655083
Log-likelihood function
compute_loglikelihood_variance<-function(rss, n){
sigma_squared <- rss/(n-1)
log_likelihood<- -(n/2)*log(2*pi)-(n/2)*log(sigma_squared)-(1/(2*sigma_squared))*rss
return(list(log_likelihood=log_likelihood,variance=sigma_squared))
}
# log likelihood and variance model1
compute_model1<-compute_loglikelihood_variance(rss_model1, length(target))
model1_log_likelihood<-compute_model1$log_likelihood
model1_variance<-compute_model1$variance
print("Model 1 Log-Likelihood:")
## [1] "Model 1 Log-Likelihood:"
print(model1_log_likelihood)
## [1] -164714.6
print("Model 1 Variance:")
## [1] "Model 1 Variance:"
print(model1_variance)
## [1] 5.253676e+13
# log likelihood and variance model2
compute_model2<-compute_loglikelihood_variance(rss_model2, length(target))
model2_log_likelihood<-compute_model2$log_likelihood
model2_variance<-compute_model2$variance
print("Model 2 Log-Likelihood:")
## [1] "Model 2 Log-Likelihood:"
print(model2_log_likelihood)
## [1] -164433.3
print("Model 2 Variance:")
## [1] "Model 2 Variance:"
print(model2_variance)
## [1] 4.953723e+13
# log likelihood and variance model3
compute_model3<-compute_loglikelihood_variance(rss_model3, length(target))
model3_log_likelihood<-compute_model3$log_likelihood
model3_variance<-compute_model3$variance
print("Model 3 Log-Likelihood:")
## [1] "Model 3 Log-Likelihood:"
print(model3_log_likelihood)
## [1] -389154.6
print("Model 3 Variance:")
## [1] "Model 3 Variance:"
print(model3_variance)
## [1] 1.245287e+34
# log likelihood and variance model4
compute_model4<-compute_loglikelihood_variance(rss_model4, length(target))
model4_log_likelihood<-compute_model4$log_likelihood
model4_variance<-compute_model4$variance
print("Model 4 Log-Likelihood:")
## [1] "Model 4 Log-Likelihood:"
print(model4_log_likelihood)
## [1] -352016.2
print("Model 4 Variance:")
## [1] "Model 4 Variance:"
print(model4_variance)
## [1] 5.294457e+30
# log likelihood and variance model5
compute_model5<-compute_loglikelihood_variance(rss_model5, length(target))
model5_log_likelihood<-compute_model5$log_likelihood
model5_variance<-compute_model5$variance
print("Model 5 Log-Likelihood:")
## [1] "Model 5 Log-Likelihood:"
print(model5_log_likelihood)
## [1] -135847.9
print("Model 5 Variance")
## [1] "Model 5 Variance"
print(model5_variance)
## [1] 1.25871e+11
Compute AIC and BIC
# calculate aic
compute_aic<-function(log_likelihood, k){
aic<-2 * k - 2 * log_likelihood
return(aic)
}
# calculate bic
compute_bic<-function(log_likelihood, k, n){
bic <- k * log(n) - 2 * log_likelihood
return(bic)
}
# aic and bic model 1
model1_k<-length(theta_model1)
model1_aic<-compute_aic(model1_log_likelihood, model1_k)
model1_bic<-compute_bic(model1_log_likelihood, model1_k, length(target))
print("Model 1 AIC:")
## [1] "Model 1 AIC:"
print(model1_aic)
## [1] 329435.2
print("Model 1 BIC:")
## [1] "Model 1 BIC:"
print(model1_bic)
## [1] 329456.7
# aic and bic model 2
model2_k<-length(theta_model2)
model2_aic<-compute_aic(model2_log_likelihood, model2_k)
model2_bic<-compute_bic(model2_log_likelihood, model2_k, length(target))
print("Model 2 AIC:")
## [1] "Model 2 AIC:"
print(model2_aic)
## [1] 328874.7
print("Model 2 BIC:")
## [1] "Model 2 BIC:"
print(model2_bic)
## [1] 328903.4
# aic and bic model 3
model3_k<-length(theta_model3)
model3_aic<-compute_aic(model3_log_likelihood, model3_k)
model3_bic<-compute_bic(model3_log_likelihood, model3_k, length(target))
print("Model 3 AIC:")
## [1] "Model 3 AIC:"
print(model3_aic)
## [1] 778317.3
print("Model 3 BIC:")
## [1] "Model 3 BIC:"
print(model3_bic)
## [1] 778345.9
# aic and bic model 4
model4_k<-length(theta_model4)
model4_aic<-compute_aic(model4_log_likelihood, model4_k)
model4_bic<-compute_bic(model4_log_likelihood, model4_k, length(target))
print("Model 4 AIC:")
## [1] "Model 4 AIC:"
print(model4_aic)
## [1] 704040.4
print("Model 4 BIC:")
## [1] "Model 4 BIC:"
print(model4_bic)
## [1] 704069.1
# aic and bic model 5
model5_k<-length(theta_model5)
model5_aic<-compute_aic(model5_log_likelihood, model5_k)
model5_bic<-compute_bic(model5_log_likelihood, model5_k, length(target))
print("Model 5 AIC:")
## [1] "Model 5 AIC:"
print(model5_aic)
## [1] 271703.8
print("Model 5 BIC:")
## [1] "Model 5 BIC:"
print(model5_bic)
## [1] 271732.4
Task 2.5: Distributin of model prediction errors
compute_model_prediction_error<-function(X, y, theta, model){
y_pred<-model(X, theta)
prediction_error<-y - y_pred
return(prediction_error)
}
# QQ plot for model 1 distribution
prediction_error_model1 <- compute_model_prediction_error(X_model1, target, theta_model1, model1)
qqnorm(prediction_error_model1, main = "Q-Q Plot of Prediction Error for Model 1")
qqline(prediction_error_model1)

# QQ plot for model 2 distribution
prediction_error_model2 <- compute_model_prediction_error(X_model2, target, theta_model2, model2)
qqnorm(prediction_error_model2, main = "Q-Q Plot of Prediction Error for Model 2")
qqline(prediction_error_model2)

# QQ plot for model 3 distribution
prediction_error_model3 <- compute_model_prediction_error(X_model3, target, theta_model3, model3)
qqnorm(prediction_error_model3, main = "Q-Q Plot of Prediction Error for Model 3")
qqline(prediction_error_model3)

# QQ plot for model 4 distribution
prediction_error_model4 <- compute_model_prediction_error(X_model4, target, theta_model4, model4)
qqnorm(prediction_error_model4, main = "Q-Q Plot of Prediction Error for Model 4")
qqline(prediction_error_model4)

# QQ plot for model 5 distribution
prediction_error_model5 <- compute_model_prediction_error(X_model5, target, theta_model5, model5)
qqnorm(prediction_error_model5, main = "Q-Q Plot of Prediction Error for Model 5")
qqline(prediction_error_model5)

Task 2.7 train Test Split
set.seed(42) # for reproducibility
# Create the full design matrix for Model 5
X_model5 <- data.frame(
x4 = X$x4,
x1 = X$x1^2,
x3 = X$x3^2
)
y <- target
# --- 1. Split into training and testing sets (70/30) ---
n <- nrow(X_model5)
train_idx <- sample(1:n, size = round(0.7 * n))
test_idx <- setdiff(1:n, train_idx)
X_train <- X_model5[train_idx, ]
y_train <- y[train_idx, , drop = FALSE]
X_test <- X_model5[test_idx, ]
y_test <- y[test_idx, , drop = FALSE]
# --- 2. Estimate theta on training data ---
library(MASS)
theta_ridge <- function(X, y, lambda = 1e-1) {
X_bias <- cbind(1, as.matrix(X))
XtX <- t(X_bias) %*% X_bias
XtX_reg <- XtX + lambda * diag(c(0, rep(1, ncol(X))))
theta <- ginv(XtX_reg) %*% t(X_bias) %*% y
return(theta)
}
theta_hat <- theta_ridge(X_train, y_train)
# --- 3. Make predictions on the test set ---
X_test_bias <- cbind(1, as.matrix(X_test))
y_pred <- X_test_bias %*% theta_hat
# --- 4. Compute 95% confidence intervals ---
# Residual standard error from training data
X_train_bias <- cbind(1, as.matrix(X_train))
y_train_pred <- X_train_bias %*% theta_hat
residuals_train <- y_train - y_train_pred
rss <- sum(residuals_train^2)
df <- nrow(X_train) - length(theta_hat)
sigma2 <- rss / df
# Standard errors of predictions
XtX_inv <- ginv(t(X_train_bias) %*% X_train_bias + 1e-1 * diag(c(0, rep(1, ncol(X_train)))))
se_pred <- sqrt(diag(X_test_bias %*% XtX_inv %*% t(X_test_bias)) * sigma2)
# 95% confidence intervals
lower_bound <- y_pred - 1.96 * se_pred
upper_bound <- y_pred + 1.96 * se_pred
# --- 5. Plot predictions, confidence intervals, and actual values ---
plot(y_test, type = 'p', col = 'black', pch = 16,
ylab = "Target", xlab = "Test Sample Index", main = "Model 5 Prediction with 95% CI")
points(y_pred, col = 'blue', pch = 17)
arrows(1:length(y_pred), lower_bound, 1:length(y_pred), upper_bound,
length = 0.05, angle = 90, code = 3, col = 'red')
legend("topleft", legend = c("True y", "Predicted y", "95% CI"),
col = c("black", "blue", "red"), pch = c(16, 17, NA), lty = c(NA, NA, 1))

install.packages("gridExtra")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
set.seed(42)
# --- 1) Prepare ---
# Your estimated coefficients from Task 2 (ridge regression)
theta_hat_vec <- as.vector(theta_hat) # includes intercept + 3 params
names(theta_hat_vec) <- c("intercept", colnames(X_train))
# Find indices of 2 largest absolute coefficients (excluding intercept)
abs_coefs <- abs(theta_hat_vec[-1])
top2_idx <- order(abs_coefs, decreasing = TRUE)[1:2]
param_names <- names(abs_coefs)[top2_idx]
cat("Selected parameters for ABC:", param_names, "\n")
## Selected parameters for ABC: x4 x1
# Fix other parameters at estimated values
fixed_params <- theta_hat_vec[-c(1, top2_idx + 1)] # exclude intercept and top2
# --- 2) Define simulation function based on model ---
simulate_y <- function(X_data, theta_vec, sigma_sq) {
# theta_vec = c(intercept, coef_x4, coef_x1_squared, coef_x3_squared)
mu <- theta_vec[1] +
theta_vec[2] * X_data$x4 +
theta_vec[3] * X_data$x1 +
theta_vec[4] * X_data$x3
y_sim <- rnorm(nrow(X_data), mean = mu, sd = sqrt(sigma_sq))
return(y_sim)
}
# --- 3) Define prior sampler for the 2 params ---
# Get estimated values for the two parameters
theta_1_hat <- theta_hat_vec[top2_idx[1] + 1] # +1 for intercept offset
theta_2_hat <- theta_hat_vec[top2_idx[2] + 1]
# Define ±10% prior range around estimated values
prior_range_1 <- c(theta_1_hat * 0.9, theta_1_hat * 1.1)
prior_range_2 <- c(theta_2_hat * 0.9, theta_2_hat * 1.1)
# Use estimated sigma_sq from training residuals
sigma_sq_hat <- sigma2
prior_sampler <- function() {
theta_1 <- runif(1, min = min(prior_range_1), max = max(prior_range_1))
theta_2 <- runif(1, min = min(prior_range_2), max = max(prior_range_2))
list(theta_1 = theta_1, theta_2 = theta_2, sigma_sq = sigma_sq_hat)
}
# --- 4) Calculate summary statistics ---
calc_summary_stats <- function(y_vec) {
c(mean = mean(y_vec), var = var(y_vec))
}
S_obs <- calc_summary_stats(y_train)
# --- 5) Distance metric ---
distance_metric <- function(S_sim, S_obs) {
sqrt(sum((S_sim - S_obs)^2))
}
# --- 6) ABC rejection ---
N_sim <- 10000 # number of simulations (increase for better posterior)
pilot_runs <- 1000 # for epsilon selection
pilot_distances <- numeric(pilot_runs)
cat("Pilot run to select epsilon...\n")
## Pilot run to select epsilon...
for (i in 1:pilot_runs) {
sample_params <- prior_sampler()
# Build full theta vector: intercept + top2 params + fixed params
theta_full <- numeric(length(theta_hat_vec))
theta_full[1] <- theta_hat_vec[1] # intercept fixed
theta_full[top2_idx[1] + 1] <- sample_params$theta_1
theta_full[top2_idx[2] + 1] <- sample_params$theta_2
# fixed params fill others
fixed_names <- names(fixed_params)
for (name in fixed_names) {
pos <- which(names(theta_hat_vec) == name)
theta_full[pos] <- fixed_params[name]
}
y_sim <- simulate_y(X_train, theta_full, sample_params$sigma_sq)
S_sim <- calc_summary_stats(y_sim)
pilot_distances[i] <- distance_metric(S_sim, S_obs)
}
finite_dists <- pilot_distances[is.finite(pilot_distances)]
if (length(finite_dists) == 0) stop("No finite distances in pilot run!")
epsilon <- quantile(finite_dists, 0.05)
cat("Chosen epsilon (5th percentile):", epsilon, "\n")
## Chosen epsilon (5th percentile): 10.56309
# --- 7) Rejection ABC ---
accepted_thetas <- matrix(NA, nrow = N_sim, ncol = 2)
accepted_count <- 0
cat("Starting ABC rejection sampling...\n")
## Starting ABC rejection sampling...
pb <- txtProgressBar(min = 0, max = N_sim, style = 3)
## | | | 0%
for (i in 1:N_sim) {
sample_params <- prior_sampler()
theta_full <- numeric(length(theta_hat_vec))
theta_full[1] <- theta_hat_vec[1]
theta_full[top2_idx[1] + 1] <- sample_params$theta_1
theta_full[top2_idx[2] + 1] <- sample_params$theta_2
for (name in fixed_names) {
pos <- which(names(theta_hat_vec) == name)
theta_full[pos] <- fixed_params[name]
}
y_sim <- simulate_y(X_train, theta_full, sample_params$sigma_sq)
S_sim <- calc_summary_stats(y_sim)
dist <- distance_metric(S_sim, S_obs)
if (is.finite(dist) && dist < epsilon) {
accepted_count <- accepted_count + 1
accepted_thetas[accepted_count, ] <- c(sample_params$theta_1, sample_params$theta_2)
}
setTxtProgressBar(pb, i)
}
## | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
close(pb)
accepted_thetas <- accepted_thetas[1:accepted_count, , drop = FALSE]
cat("Number of accepted samples:", accepted_count, "\n")
## Number of accepted samples: 589
# --- 8) Plot results ---
library(ggplot2)
library(gridExtra)
df_post <- data.frame(
param1 = accepted_thetas[,1],
param2 = accepted_thetas[,2]
)
p1 <- ggplot(df_post, aes(x = param1)) +
geom_histogram(aes(y=..density..), bins = 30, fill = "skyblue", color = "black") +
geom_vline(xintercept = theta_1_hat, color = "red", linetype = "dashed") +
labs(title = paste("Posterior of", param_names[1]), x = param_names[1])
p2 <- ggplot(df_post, aes(x = param2)) +
geom_histogram(aes(y=..density..), bins = 30, fill = "lightgreen", color = "black") +
geom_vline(xintercept = theta_2_hat, color = "red", linetype = "dashed") +
labs(title = paste("Posterior of", param_names[2]), x = param_names[2])
p_joint <- ggplot(df_post, aes(x = param1, y = param2)) +
geom_point(alpha = 0.4, color = "blue") +
geom_vline(xintercept = theta_1_hat, linetype = "dashed", color = "red") +
geom_hline(yintercept = theta_2_hat, linetype = "dashed", color = "red") +
labs(title = "Joint Posterior Distribution", x = param_names[1], y = param_names[2])
grid.arrange(p1, p2, p_joint, ncol = 2)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
